/***
This do-file retreives some numbers we use in the paper.
***/

*-------------------------------------------------------------------------------
* Set up
*-------------------------------------------------------------------------------

* Set $root 
project figstabs, root
if (r(buildrunning)==0) include "${root}/code/config_interactive.do"

* Set globals
project, uses("${root}/code/set_globals.do")
include "${root}/code/set_globals.do"
local category "Small Business Revenue"

* Create required subfolders
cap mkdir "${root}/results/Small Business Revenue"
cap mkdir "${root}/results/paper numbers"
cap mkdir "${root}/results/paper numbers/`category'"

*-------------------------------------------------------------------------------
**# 1. Clean Womply data  
*-------------------------------------------------------------------------------
project, uses("${root}/data/web/data/Womply - ZCTA - 2020.csv")
import delimited  "${root}/data/web/data/Womply - ZCTA - 2020.csv", clear

rename(revenue_all_apr2020) (change)

* Merge ZCTA-level covariates
project, uses("${root}/data/derived/ACS 2014-2018 5-Year ZCTA/ACS 2014-2018 ZCTA.dta")
merge m:1 zcta using "${root}/data/derived/ACS 2014-2018 5-Year ZCTA/ACS 2014-2018 ZCTA.dta", keep(1 3) nogen keepusing(pop_2014_2018_est med_2br_2014_2018_est)
rename (pop_2014_2018_est med_2br_2014_2018_est) (pop_2018 med_2br_2018)

* Convert the changes to percentages
gen revenue_change_100 = change * 100

* Winsorize changes 
foreach var in revenue_change_100 { 
	gen wins_`var' = `var'
	sum `var' [w = pop_2018], d 
	replace wins_`var' = `r(p99)' if `var' >= `r(p99)' & !mi(`var')
}

* Merge counties (for SEs)
rename zcta zcta5
preserve
	project, uses("${root}/data/dvc/ACS 2014-2018 5-Year ZCTA/Area/zcta_county_rel_10.txt")
	import delimited "${root}/data/dvc/ACS 2014-2018 5-Year ZCTA/Area/zcta_county_rel_10.txt", encoding(ISO-8859-2) clear
	sort zcta5 poppt
	bysort zcta5 (poppt): keep if _n == _N // only keeps the county where most of the zcta lives
	gen county_fips = 1000*state + county
	keep zcta5 county_fips
	tempfile zcta_county_CW
	save `zcta_county_CW'
restore

merge m:1 zcta5 using `zcta_county_CW', keep(1 3) nogen

tempfile womply 
save `womply'

********************************************************************************
**# 2. Clean Safegraph data 
********************************************************************************

*==============================================================================*
**## 2.1. Safegraph
*==============================================================================*

* Read Safegraph data 
foreach month in jan apr {
	project, uses("${root}/data/dvc/Safegraph/home_panel_summary-`month'_20.csv") 
	import delimited "${root}/data/dvc/Safegraph/home_panel_summary-`month'_20.csv", clear
	
	gisid census_block_group
	keep census_block_group number_devices_residing
	rename number_devices_residing number_devices_residing_`month'

	tempfile safe_graph_`month'
	save `safe_graph_`month''
}

* Merge datasets (wide)
use `safe_graph_jan', clear 
merge 1:1 census_block_group using `safe_graph_apr', keep(match) nogen 

* Collapse to tract level 
gen double tract_id = int(census_block_group / 10)
gcollapse (rawsum) number_devices_residing*, by(tract_id)

tempfile safegraph 
save `safegraph'

*==============================================================================*
**## 2.2. Tract to ZIP crosswalk
*==============================================================================*

* Crosswalk from Census (can't use 2020 version because no tract X zip pop share) 
* https://www.census.gov/programs-surveys/geography/technical-documentation/records-layout/2010-zcta-record-layout.html
project, uses("${root}/data/dvc/Crosswalks/zcta_tract_rel_10.txt")
import delimited "${root}/data/dvc/Crosswalks/zcta_tract_rel_10.txt", clear

* Merge to Safegraph 
gisid zcta5 geoid
rename geoid tract_id
merge m:1 tract_id using `safegraph', keep(match)

* Tracts and ZIPs have a many-to-many relationship
* Thus calculate no. of devices in each tract X ZIP cell 
* by multiplying no. of devices by the proportion of tract population that is in the tract X ZIP cell
foreach month in jan apr {
	gen num_devices_`month' = number_devices_residing_`month' * 0.01 * trpoppct
}

* Collapse to ZIP level 
gcollapse (rawsum) num_devices_jan num_devices_apr, by(zcta5)

* % change in number of devices 
gen pct_change_devices_jan_apr = 100 * (num_devices_apr/ num_devices_jan - 1)

tempfile safegraph_zip 
save `safegraph_zip'

********************************************************************************
**# 3. Rent gradients
********************************************************************************

use `womply', clear 
merge 1:1 zcta5 using `safegraph_zip', keep(1 3) nogen

* Without controlling for change in pop
reg wins_revenue_change_100 med_2br_2018 [w = pop_2018], vce(cluster county_fips)

* Controlling for change in pop 	
reg wins_revenue_change_100 med_2br_2018 pct_change_devices_jan_apr [w = pop_2018], vce(cluster county_fips)

* Stats for paper 
local smallbiz_rent_safegraph: di %3.1f abs(_b[med_2br_2018] * 1000)
local smallbiz_rent_safegraph_se: di %3.2f _se[med_2br_2018] * 1000

* Export
cap erase "${root}/results/paper numbers/`category'/Small Business Revenue Rent Gradient with Safegraph Controls.yaml"

yamlout using "${root}/results/paper numbers/`category'/Small Business Revenue Rent Gradient with Safegraph Controls.yaml", ///
	key("smallbiz_rent_safegraph") ///
	value(`smallbiz_rent_safegraph') fmt(%3.1f)
	
yamlout using "${root}/results/paper numbers/`category'/Small Business Revenue Rent Gradient with Safegraph Controls.yaml", ///
	key("smallbiz_rent_safegraph_se") ///
	value(`smallbiz_rent_safegraph_se') fmt(%3.2f)

project, creates("${root}/results/paper numbers/`category'/Small Business Revenue Rent Gradient with Safegraph Controls.yaml")
